metabolic_data = read.csv('Metabolism Data Prob 26.csv')
metabolic_data$Mass_mod = metabolic_data$Mass^(0.75)
summary(metabolic_data)
## CommonName Species
## Mouse : 4 Antechinomus_laniger : 1
## Rat : 4 Antechinomus_stuartii : 1
## Woodrat : 4 Antechinus_macdonnellensis: 1
## Rodent : 3 Antilocapra_americana : 1
## Australian_marsupial: 2 Blarina_brevicauda : 1
## Bat : 2 Bradypus_variegatus : 1
## (Other) :76 (Other) :89
## Mass Metab Life Mass_mod
## Min. : 0.0036 Min. : 5.17 Min. : 0.8 Min. : 0.0147
## 1st Qu.: 0.0990 1st Qu.: 39.05 1st Qu.: 3.5 1st Qu.: 0.1763
## Median : 1.9800 Median : 320.00 Median : 8.0 Median : 1.6692
## Mean : 69.6215 Mean : 5173.49 Mean :12.3 Mean : 14.3155
## 3rd Qu.: 11.9000 3rd Qu.: 1695.00 3rd Qu.:15.5 3rd Qu.: 6.4044
## Max. :3000.0000 Max. :165000.00 Max. :75.0 Max. :405.3600
##
Assess relationship between mass metabolic rate:
{meta|mass}: meta = B0 + B1(mass)
or
{meta|mass_.75}: meta = B0 + B1(mass)
ggplot(data=metabolic_data) +
geom_point(aes(x=Mass, y=Metab), color='Red') +
geom_point(aes(x=Mass_mod, y=Metab), color='Blue')
Trend is stronger for X^3/4
mass_model = lm(data=metabolic_data, formula = Metab ~ Mass_mod)
summary(mass_model)
##
## Call:
## lm(formula = Metab ~ Mass_mod, data = metabolic_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11712.7 -117.4 368.5 474.3 6598.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -481.346 213.467 -2.255 0.0265 *
## Mass_mod 395.016 4.299 91.895 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1992 on 93 degrees of freedom
## Multiple R-squared: 0.9891, Adjusted R-squared: 0.989
## F-statistic: 8445 on 1 and 93 DF, p-value: < 2.2e-16
bird_plot = ggplot(metabolic_data, aes(x=Mass_mod, y=Metab)) +
geom_point(color='Blue', size = 2) +
geom_smooth(method = lm, se=TRUE, level=0.99, color='Red')
predictions_cl = predict(mass_model, newdata = data.frame(Mass_mod = sort(metabolic_data$Mass_mod)), interval = c('predict'), type=c('response'), level=0.99)
regress_cl = predict(mass_model, newdata = data.frame(Mass_mod = sort(metabolic_data$Mass_mod)), interval = c('confidence'), type=c('response'), level=0.99)
bird_plot +
geom_line(data=data.frame(predictions_cl), aes(x=sort(metabolic_data$Mass_mod), y=upr), color='Green') +
geom_line(data=data.frame(predictions_cl), aes(x=sort(metabolic_data$Mass_mod), y=lwr), color='Green') +
geom_line(data=data.frame(regress_cl), aes(x=sort(metabolic_data$Mass_mod), y=upr), color='Blue') +
geom_line(data=data.frame(regress_cl), aes(x=sort(metabolic_data$Mass_mod), y=lwr), color='Blue')
plot(mass_model)
log_data = metabolic_data
log_data$Meta_log = log(log_data$Metab)
log_data$Mass_mod_log = log(log_data$Mass_mod)
ggplot(data=log_data) +
geom_point(aes(x=Mass_mod_log, y=Meta_log), color='Red') +
geom_point(aes(x=Mass_mod_log, y=Meta_log), color='Blue')
log_model = lm(data=log_data, formula = Meta_log ~ Mass_mod_log)
bird_plot = ggplot(log_data, aes(x=Mass_mod_log, y=Meta_log)) +
geom_point(color='Blue', size = 2) +
geom_smooth(method = lm, se=TRUE, level=0.99, color='Red')
predictions_cl = predict(log_model, newdata = data.frame(Mass_mod_log = sort(log_data$Mass_mod_log)), interval = c('predict'), type=c('response'), level=0.99)
regress_cl = predict(log_model, newdata = data.frame(Mass_mod_log = sort(log_data$Mass_mod_log)), interval = c('confidence'), type=c('response'), level=0.99)
bird_plot +
geom_line(data=data.frame(predictions_cl), aes(x=sort(log_data$Mass_mod_log), y=upr), color='Green') +
geom_line(data=data.frame(predictions_cl), aes(x=sort(log_data$Mass_mod_log), y=lwr), color='Green') +
geom_line(data=data.frame(regress_cl), aes(x=sort(log_data$Mass_mod_log), y=upr), color='Blue') +
geom_line(data=data.frame(regress_cl), aes(x=sort(log_data$Mass_mod_log), y=lwr), color='Blue')
m<-mean(log_model$residuals)
std<-sqrt(var(log_model$residuals))
hist(log_model$residuals, breaks=20, prob=TRUE,
xlab="x-variable", ylim=c(0, 2),
main="normal curve over histogram")
curve(dnorm(x, mean=m, sd=std),
col="darkblue", lwd=2, add=TRUE, yaxt="n")
plot(log_model)
Meets assumptions for normality, linearity, equal variance and independence
Outliers are not an issue according to Cookd measures
summary(log_model)
##
## Call:
## lm(formula = Meta_log ~ Mass_mod_log, data = log_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.14216 -0.26466 -0.04889 0.25308 1.37616
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.63833 0.04709 119.73 <2e-16 ***
## Mass_mod_log 0.98499 0.01949 50.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4572 on 93 degrees of freedom
## Multiple R-squared: 0.9649, Adjusted R-squared: 0.9645
## F-statistic: 2553 on 1 and 93 DF, p-value: < 2.2e-16
confint(log_model)
## 2.5 % 97.5 %
## (Intercept) 5.5448128 5.731849
## Mass_mod_log 0.9462828 1.023700
{log(Meta):log(Mass)}: B0 + B1log(Mass) = 5.64 + 0.98log(Mass)
A doubling of the mass corresponds to a 98% increase in the expected median of Metabolic activity
A 95% confidence interval for this effect is [94%, 102%]
96% of the variability is explained by the model
autism_data = read.csv('Autism Data Prob 29.csv')
summary(autism_data)
## Year Prevalence
## Min. :1992 Min. : 3.50
## 1st Qu.:1994 1st Qu.: 5.30
## Median :1996 Median : 7.80
## Mean :1996 Mean : 9.34
## 3rd Qu.:1998 3rd Qu.:11.80
## Max. :2000 Max. :18.30
autism_model = lm(data=autism_data, formula = Prevalence ~ Year)
summary(autism_model)
##
## Call:
## lm(formula = Prevalence ~ Year, data = autism_data)
##
## Residuals:
## 1 2 3 4 5
## 1.38 -0.43 -1.54 -1.15 1.74
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3593.440 540.858 -6.644 0.00695 **
## Year 1.805 0.271 6.661 0.00690 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.714 on 3 degrees of freedom
## Multiple R-squared: 0.9367, Adjusted R-squared: 0.9156
## F-statistic: 44.37 on 1 and 3 DF, p-value: 0.006897
plot(autism_model)
ggplot(data=autism_data) +
geom_point(aes(x=Year, y=Prevalence), color='Red') +
geom_point(aes(x=Year, y=Prevalence), color='Blue')
autism_model = lm(data=autism_data, formula = Prevalence ~ Year)
bird_plot = ggplot(autism_data, aes(x=Year, y=Prevalence)) +
geom_point(color='Blue', size = 2) +
geom_smooth(method = lm, se=TRUE, level=0.99, color='Red')
predictions_cl = predict(autism_model, newdata = data.frame(Year = sort(autism_data$Year)), interval = c('predict'), type=c('response'), level=0.99)
regress_cl = predict(autism_model, newdata = data.frame(Year = sort(autism_data$Year)), interval = c('confidence'), type=c('response'), level=0.99)
bird_plot +
geom_line(data=data.frame(predictions_cl), aes(x=sort(autism_data$Year), y=upr), color='Green') +
geom_line(data=data.frame(predictions_cl), aes(x=sort(autism_data$Year), y=lwr), color='Green') +
geom_line(data=data.frame(regress_cl), aes(x=sort(autism_data$Year), y=upr), color='Blue') +
geom_line(data=data.frame(regress_cl), aes(x=sort(autism_data$Year), y=lwr), color='Blue')
m<-mean(autism_model$residuals)
std<-sqrt(var(autism_model$residuals))
hist(autism_model$residuals, breaks=20, prob=TRUE,
xlab="x-variable", ylim=c(0, 2),
main="normal curve over histogram")
curve(dnorm(x, mean=m, sd=std),
col="darkblue", lwd=2, add=TRUE, yaxt="n")
plot(autism_model)
Log Linear
log_data = autism_data
log_data$Prevalence_log = log(log_data$Prevalence)
autism_model = lm(data=log_data, formula = Prevalence_log ~ Year)
summary(autism_model)
##
## Call:
## lm(formula = Prevalence_log ~ Year, data = log_data)
##
## Residuals:
## 1 2 3 4 5
## 0.004578 0.008655 -0.015795 -0.012686 0.015248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.080e+02 4.953e+00 -82.38 3.94e-06 ***
## Year 2.054e-01 2.481e-03 82.79 3.88e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01569 on 3 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9994
## F-statistic: 6855 on 1 and 3 DF, p-value: 3.884e-06
plot(autism_model)
ggplot(data=log_data) +
geom_point(aes(x=Year, y=Prevalence_log), color='Red') +
geom_point(aes(x=Year, y=Prevalence_log), color='Blue')
autism_model = lm(data=log_data, formula = Prevalence_log ~ Year)
bird_plot = ggplot(log_data, aes(x=Year, y=Prevalence_log)) +
geom_point(color='Blue', size = 2) +
geom_smooth(method = lm, se=TRUE, level=0.99, color='Red')
predictions_cl = predict(autism_model, newdata = data.frame(Year = sort(log_data$Year)), interval = c('predict'), type=c('response'), level=0.99)
regress_cl = predict(autism_model, newdata = data.frame(Year = sort(log_data$Year)), interval = c('confidence'), type=c('response'), level=0.99)
bird_plot +
geom_line(data=data.frame(predictions_cl), aes(x=sort(log_data$Year), y=upr), color='Green') +
geom_line(data=data.frame(predictions_cl), aes(x=sort(log_data$Year), y=lwr), color='Green') +
geom_line(data=data.frame(regress_cl), aes(x=sort(log_data$Year), y=upr), color='Blue') +
geom_line(data=data.frame(regress_cl), aes(x=sort(log_data$Year), y=lwr), color='Blue')
m<-mean(autism_model$residuals)
std<-sqrt(var(autism_model$residuals))
hist(autism_model$residuals, freq = TRUE, breaks=20)
curve(dnorm(x, mean=m, sd=std),
col="darkblue", lwd=2, add=TRUE, yaxt="n")
plot(autism_model)
Still very hard to run a regression for such a small sample - but that’s the assigment… sooooo
Meets assumptions for normality, linearity, equal variance and independence
Outliers are not an issue according to Cookd measures
{log(Prevalence):Year)}: B0 + B1Year = -408 + 0.020Year
A multiplicative effect of year on prevalance of 23% is predicted by the model
A 95% confidence interval for this effect is [22%, 24%]
99% of the variability is explained by the model
(note that there is no reason to suspect that year should be a part of this model, model is nonsense)